library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(ggridges)
library(vegan)
## Loading required package: permute
## Loading required package: lattice
## This is vegan 2.6-8
library(readr)
library(breakaway)
library(rioja)
## This is rioja 1.0-7
# Here we have made our input into smaller 100MB files and now read them in
setwd("rawdata/MetagenomicFirstRound/")
con <- pipe("cat part.gz_* | gunzip -c", "rb")
data <- read_csv(con)
## Rows: 1132002 Columns: 187
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): sample, tax_name, tax_rank, tax_path
## dbl (182): tax_id, N_reads, N_alignments, MAP_damage, MAP_significance, mean...
## lgl (1): MAP_valid
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
close(con)
setwd("../../")
#metadata
metadata <- read.csv("metadata.csv")
# Minimum amount of damage filter
DamMin = 0.0
D_Max = 1.0
#Lambda Likelihood Ratio
LR = 0
# Minimum reads for parsing taxa
MinRead = 1
# Minimum mean read length
MinLength = 35
Here we are looking at some values produced by fastp.
First lets look at the relationship between duplication percentage and ∆Ct (difference between the ideal number fo PCR cycles and used number of cycles). The est age of samples in BP is shown.
plot(metadata$Dct,metadata$duplication.rate,pch=16,ylab="% duplication")
abline(v=0,col="darkred",lwd=3)
text(0.2,60,labels="Underamplified", adj=0)
text(-0.2,60,labels="Overamplified", adj=1)
text(metadata$Dct,metadata$duplication.rate+1.5,labels = metadata$age,cex=0.4)
Now lets look at the low complexity proportion vs ∆Ct
plot(metadata$Dct,metadata$low.complexity,pch=16,ylab="% low complexity")
abline(v=0,col="darkred",lwd=3)
text(0.2,0.32,labels="Underamplified", adj=0)
text(-0.2,0.32,labels="Overamplified", adj=1)
Now how many post-fastp reads there are per sample.
plot(metadata$Dct,metadata$dedupReads,pch=16,ylab="Post fastp reads")
abline(v=0,col="darkred",lwd=3)
text(0.2,2.2*10^8,labels="Underamplified", adj=0)
text(-0.2,2.2*10^8,labels="Overamplified", adj=1)
text(metadata$Dct,metadata$dedupReads+5*10^6,labels = metadata$age,cex=0.4)
First we wrangle the raw data
# Let's make an animal subset of the data
df1 <- data %>% filter(MAP_damage > DamMin, N_reads >= MinRead, mean_L > MinLength, MAP_significance > LR, grepl("Metazoa",tax_path), grepl("^genus", tax_rank), grepl("", sample))
inputdata <- df1
inputdata$sample2 <- factor(inputdata$sample,levels= sort(unique(inputdata$sample),decreasing = TRUE))
inputdata$age <- metadata$age[match(inputdata$sample2,metadata$Sample)]
inputdata$site <- as.factor(metadata$CoreID[match(inputdata$sample2,metadata$Sample)])
raw.metazoa <- inputdata[inputdata$N_reads>99,]
raw.metazoa.50 <- inputdata[inputdata$N_reads>49,]
raw.metazoa.1 <- inputdata
raw.metazoa.50.wide <- raw.metazoa.50 %>%
pivot_wider(
id_cols = tax_name, # Rows will be `tax_name`
names_from = sample, # Columns will be `sample`
values_from = N_reads, # Observations will be `N_reads`
values_fill = 0 # Fill missing values with 0
)
raw.metazoa.1.wide <- raw.metazoa.1 %>%
pivot_wider(
id_cols = tax_name, # Rows will be `tax_name`
names_from = sample, # Columns will be `sample`
values_from = N_reads, # Observations will be `N_reads`
values_fill = 0 # Fill missing values with 0
)
raw.metazoa.wide <- raw.metazoa %>%
pivot_wider(
id_cols = tax_name, # Rows will be `tax_name`
names_from = sample, # Columns will be `sample`
values_from = N_reads, # Observations will be `N_reads`
values_fill = 0 # Fill missing values with 0
)
# plants
df1 <- data %>% filter(MAP_damage > DamMin, N_reads >= MinRead, mean_L > MinLength, MAP_significance > LR, grepl("Viridiplantae",tax_path), grepl("^genus", tax_rank), grepl("", sample))
inputdata <- df1
inputdata$sample2 <- factor(inputdata$sample,levels= sort(unique(inputdata$sample),decreasing = TRUE))
inputdata$age <- metadata$age[match(inputdata$sample2,metadata$Sample)]
inputdata$site <- as.factor(metadata$CoreID[match(inputdata$sample2,metadata$Sample)])
raw.plants <- inputdata[inputdata$N_reads>99,]
raw.plants.50 <- inputdata[inputdata$N_reads>49,]
raw.plants.50.wide <- raw.plants.50 %>%
pivot_wider(
id_cols = tax_name, # Rows will be `tax_name`
names_from = sample, # Columns will be `sample`
values_from = N_reads, # Observations will be `N_reads`
values_fill = 0 # Fill missing values with 0
)
raw.plants.wide <- raw.plants %>%
pivot_wider(
id_cols = tax_name, # Rows will be `tax_name`
names_from = sample, # Columns will be `sample`
values_from = N_reads, # Observations will be `N_reads`
values_fill = 0 # Fill missing values with 0
)
# bacteria
df1 <- data %>% filter(MAP_damage > DamMin, N_reads >= MinRead, mean_L > MinLength, MAP_significance > LR, grepl("Bacteria",tax_path), grepl("^genus", tax_rank), grepl("", sample))
inputdata <- df1
inputdata$sample2 <- factor(inputdata$sample,levels= sort(unique(inputdata$sample),decreasing = TRUE))
inputdata$age <- metadata$age[match(inputdata$sample2,metadata$Sample)]
inputdata$site <- as.factor(metadata$CoreID[match(inputdata$sample2,metadata$Sample)])
raw.bacteria <- inputdata[inputdata$N_reads>99,]
raw.bacteria.50<- inputdata[inputdata$N_reads>49,]
raw.bacteria.50.wide <- raw.bacteria.50 %>%
pivot_wider(
id_cols = tax_name, # Rows will be `tax_name`
names_from = sample, # Columns will be `sample`
values_from = N_reads, # Observations will be `N_reads`
values_fill = 0 # Fill missing values with 0
)
raw.bacteria.wide <- raw.bacteria %>%
pivot_wider(
id_cols = tax_name, # Rows will be `tax_name`
names_from = sample, # Columns will be `sample`
values_from = N_reads, # Observations will be `N_reads`
values_fill = 0 # Fill missing values with 0
)
Now we make some plots, first the animals
ggplot(raw.metazoa, aes(x = MAP_damage, y = sample2, group = sample2)) +
geom_density_ridges2(scale = 3, alpha = 0.55) + # Plot density ridges first
geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Metazoa MAP Damage")+
geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of 0.00621
## Picking joint bandwidth of 0.0376
## Picking joint bandwidth of 0.0317
## Picking joint bandwidth of 0.042
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_text()`).
ggplot(raw.metazoa, aes(x = mean_L, y = sample2, group = sample2)) +
geom_density_ridges2(scale = 3, alpha = 0.55) + # Plot density ridges first
geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Metazoa Mean Length")+
geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of 4.61
## Picking joint bandwidth of 3.38
## Picking joint bandwidth of 3.55
## Picking joint bandwidth of 3.35
## Warning: Removed 11 rows containing missing values or values outside the scale range
## (`geom_text()`).
Now bacteria
ggplot(raw.bacteria, aes(x = MAP_damage, y = sample2, group = sample2)) +
geom_density_ridges2(scale = 3, alpha = 0.55) + # Plot density ridges first
geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Bacteria MAP Damage")+
geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of 0.0026
## Picking joint bandwidth of 0.00941
## Picking joint bandwidth of 0.0166
## Picking joint bandwidth of 0.0342
## Warning: Removed 33 rows containing missing values or values outside the scale range
## (`geom_text()`).
ggplot(raw.bacteria, aes(x = mean_L, y = sample2, group = sample2)) +
geom_density_ridges2(scale = 3, alpha = 0.55) + # Plot density ridges first
geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Bacteria Mean Length")+
geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of 7.26
## Picking joint bandwidth of 2.18
## Picking joint bandwidth of 2.83
## Picking joint bandwidth of 3.31
## Warning: Removed 33 rows containing missing values or values outside the scale range
## (`geom_text()`).
Now we make some plots, first the animals
ggplot(raw.plants, aes(x = MAP_damage, y = sample2, group = sample2)) +
geom_density_ridges2(scale = 3, alpha = 0.55) + # Plot density ridges first
geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Plants MAP Damage")+
geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of NaN
## Picking joint bandwidth of 0.027
## Picking joint bandwidth of 0.0257
## Picking joint bandwidth of 0.0324
## Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_text()`).
ggplot(raw.plants, aes(x = mean_L, y = sample2, group = sample2)) +
geom_density_ridges2(scale = 3, alpha = 0.55) + # Plot density ridges first
geom_point(position = position_jitter(height = 0.1, width = 0), alpha = 0.6) + # Add points above ridges
facet_wrap(~ site, scales = "free_y") + # Split into facets by coreID
labs(y = "Samples (ordered by age)", x = "MAP Damage",title = "Plants Mean Length")+
geom_text(aes(x = max(MAP_damage), label = age), hjust = -0.1, size = 3,nudge_y = 0.4)
## Picking joint bandwidth of NaN
## Picking joint bandwidth of 4.2
## Picking joint bandwidth of 3.41
## Picking joint bandwidth of 4.55
## Warning in FUN(X[[i]], ...): no non-missing arguments to max; returning -Inf
## Warning in FUN(X[[i]], ...): Removed 5 rows containing missing values or values outside the scale range
## (`geom_text()`).
Now let’s look at some richness estimates per core
richness <- raw.metazoa.50.wide %>%
select(-tax_name) %>%
summarise(across(everything(), ~ sum(. > 0))) %>%
pivot_longer(cols = everything(), names_to = "Sample", values_to = "Richness")
# Combine richness with metadata
richness_plot_data <- richness %>%
left_join(metadata, by = "Sample")
ggplot(richness_plot_data, aes(x = age, y = Richness, color = CoreID)) +
geom_point(size = 4) +
theme_minimal() +
labs(
title = "Metazoan Genus Richness vs. Age by Core (50 reads)",
x = "Age",
y = "Richness",
color = "Core ID"
) +
theme(
plot.title = element_text(size = 16, hjust = 0.5),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
legend.title = element_text(size = 12)
)
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
## More reads in the earlier core portion - lets use breakaway estimates
results <- breakaway_nof1(raw.metazoa.1.wide[,-1])
## Warning in breakaway_nof1.data.frame(., output, plot, answers, print): Assuming
## taxa are rows
named_estimates <- map_dbl(results, ~ .x$estimate)
names(named_estimates) <- names(results)
diversity_df <- tibble(
Sample = names(named_estimates),
Richness = as.numeric(named_estimates)
)
# Merge with metadata
plot_data <- diversity_df %>%
left_join(metadata, by = "Sample")
# Create the plot
ggplot(plot_data, aes(x = age, y = Richness, color = CoreID)) +
geom_point(size = 4) +
theme_minimal() +
labs(
title = "Metazoan Genus Richness (breakaway estimate) vs. Age by Core (50 reads)",
x = "Age",
y = "Richness",
color = "Core ID"
) +
theme(
plot.title = element_text(size = 16, hjust = 0.5),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
legend.title = element_text(size = 12)
)
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Analyze and plot for raw.plants.50.wide
richness_plants <- raw.plants.50.wide %>%
select(-tax_name) %>%
summarise(across(everything(), ~ sum(. > 0))) %>%
pivot_longer(cols = everything(), names_to = "Sample", values_to = "Richness")
richness_plot_data_plants <- richness_plants %>%
left_join(metadata, by = "Sample")
ggplot(richness_plot_data_plants, aes(x = age, y = Richness, color = CoreID)) +
geom_point(size = 4) +
theme_minimal() +
labs(
title = "Plant Genus Richness vs. Age by Core (50 reads)",
x = "Age",
y = "Richness",
color = "Core ID"
) +
theme(
plot.title = element_text(size = 16, hjust = 0.5),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
legend.title = element_text(size = 12)
)
## Warning: Removed 2 rows containing missing values or values outside the scale range
## (`geom_point()`).
# Analyze and plot for raw.bacteria.50.wide
richness_bacteria <- raw.bacteria.50.wide %>%
select(-tax_name) %>%
summarise(across(everything(), ~ sum(. > 0))) %>%
pivot_longer(cols = everything(), names_to = "Sample", values_to = "Richness")
richness_plot_data_bacteria <- richness_bacteria %>%
left_join(metadata, by = "Sample")
ggplot(richness_plot_data_bacteria, aes(x = age, y = Richness, color = CoreID)) +
geom_point(size = 4) +
theme_minimal() +
labs(
title = "Bacterial Genus Richness vs. Age by Core (50 reads)",
x = "Age",
y = "Richness",
color = "Core ID"
) +
theme(
plot.title = element_text(size = 16, hjust = 0.5),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
legend.title = element_text(size = 12)
)
## Warning: Removed 3 rows containing missing values or values outside the scale range
## (`geom_point()`).
Now let’s examine the structure between samples
First the animals
raw.metazoa.50.wide.d <- as.data.frame(raw.metazoa.50.wide)
rownames(raw.metazoa.50.wide.d) <- raw.metazoa.50.wide.d[,1]
raw.metazoa.50.wide.d <- raw.metazoa.50.wide.d[,-c(1,2,3)]
par(mfrow = c(1, 2))
metaz.ord <- metaMDS(vegdist(t(raw.metazoa.50.wide.d),method = "jaccard",binary = TRUE),trymax = 200)
## Run 0 stress 0.09047535
## Run 1 stress 0.09620039
## Run 2 stress 0.1034781
## Run 3 stress 0.09724904
## Run 4 stress 0.09255168
## Run 5 stress 0.1037211
## Run 6 stress 0.09047535
## ... New best solution
## ... Procrustes: rmse 6.723257e-05 max resid 0.0003028085
## ... Similar to previous best
## Run 7 stress 0.08949237
## ... New best solution
## ... Procrustes: rmse 0.04889984 max resid 0.2418224
## Run 8 stress 0.09531488
## Run 9 stress 0.1008613
## Run 10 stress 0.09398269
## Run 11 stress 0.1097467
## Run 12 stress 0.102172
## Run 13 stress 0.09637486
## Run 14 stress 0.1060932
## Run 15 stress 0.1026671
## Run 16 stress 0.08949243
## ... Procrustes: rmse 4.785826e-05 max resid 0.0001667614
## ... Similar to previous best
## Run 17 stress 0.1019101
## Run 18 stress 0.1039842
## Run 19 stress 0.09263534
## Run 20 stress 0.09365274
## *** Best solution repeated 1 times
metaz.ord.b <- metaMDS(vegdist(t(raw.metazoa.50.wide.d),method = "bray"),trymax = 200)
## Run 0 stress 0.1159157
## Run 1 stress 0.1380456
## Run 2 stress 0.1189671
## Run 3 stress 0.1294571
## Run 4 stress 0.1345533
## Run 5 stress 0.1364903
## Run 6 stress 0.1221178
## Run 7 stress 0.1333038
## Run 8 stress 0.1234697
## Run 9 stress 0.1135296
## ... New best solution
## ... Procrustes: rmse 0.05435373 max resid 0.2265867
## Run 10 stress 0.1440678
## Run 11 stress 0.137368
## Run 12 stress 0.1416223
## Run 13 stress 0.1200431
## Run 14 stress 0.133896
## Run 15 stress 0.1333023
## Run 16 stress 0.1194143
## Run 17 stress 0.1258652
## Run 18 stress 0.1365786
## Run 19 stress 0.118108
## Run 20 stress 0.1206481
## Run 21 stress 0.1308329
## Run 22 stress 0.1135296
## ... New best solution
## ... Procrustes: rmse 4.465205e-05 max resid 0.0002628834
## ... Similar to previous best
## *** Best solution repeated 1 times
plot(metaz.ord$points[,1],metaz.ord$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord$points),metadata$Sample)])),main="Jaccard 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord$points[,1],metaz.ord$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))
plot(metaz.ord.b$points[,1],metaz.ord.b$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])),main="Bray 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord.b$points[,1],metaz.ord.b$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord.b$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))
Now plants…
raw.plants.50.wide.d <- as.data.frame(raw.plants.50.wide)
rownames(raw.plants.50.wide.d) <- raw.plants.50.wide.d[,1]
raw.plants.50.wide.d <- raw.plants.50.wide.d[,-c(1,2,3)]
par(mfrow = c(1, 2))
metaz.ord <- metaMDS(vegdist(t(raw.plants.50.wide.d),method = "jaccard",binary = TRUE),trymax = 200)
## Run 0 stress 0.1541789
## Run 1 stress 0.1592949
## Run 2 stress 0.152327
## ... New best solution
## ... Procrustes: rmse 0.09168251 max resid 0.4404266
## Run 3 stress 0.151069
## ... New best solution
## ... Procrustes: rmse 0.07541321 max resid 0.4521563
## Run 4 stress 0.4003393
## Run 5 stress 0.1523271
## Run 6 stress 0.1726165
## Run 7 stress 0.1626629
## Run 8 stress 0.1793365
## Run 9 stress 0.1510478
## ... New best solution
## ... Procrustes: rmse 0.002177563 max resid 0.01019873
## Run 10 stress 0.1781054
## Run 11 stress 0.1523271
## Run 12 stress 0.1592951
## Run 13 stress 0.1741706
## Run 14 stress 0.1789104
## Run 15 stress 0.1635321
## Run 16 stress 0.1626628
## Run 17 stress 0.1626626
## Run 18 stress 0.1587963
## Run 19 stress 0.1592949
## Run 20 stress 0.1535169
## Run 21 stress 0.1523273
## Run 22 stress 0.1674026
## Run 23 stress 0.1810176
## Run 24 stress 0.1510476
## ... New best solution
## ... Procrustes: rmse 0.0006422852 max resid 0.00328771
## ... Similar to previous best
## *** Best solution repeated 1 times
metaz.ord.b <- metaMDS(vegdist(t(raw.plants.50.wide.d),method = "bray"),trymax = 200)
## Run 0 stress 0.1206552
## Run 1 stress 0.122504
## Run 2 stress 0.1321074
## Run 3 stress 0.1185575
## ... New best solution
## ... Procrustes: rmse 0.08221816 max resid 0.3337819
## Run 4 stress 0.1383644
## Run 5 stress 0.1036039
## ... New best solution
## ... Procrustes: rmse 0.04040765 max resid 0.2536644
## Run 6 stress 0.122763
## Run 7 stress 0.1230242
## Run 8 stress 0.1384721
## Run 9 stress 0.1299303
## Run 10 stress 0.1184053
## Run 11 stress 0.1253682
## Run 12 stress 0.1035932
## ... New best solution
## ... Procrustes: rmse 0.002858671 max resid 0.01366886
## Run 13 stress 0.1299268
## Run 14 stress 0.103512
## ... New best solution
## ... Procrustes: rmse 0.0154484 max resid 0.07688617
## Run 15 stress 0.1035118
## ... New best solution
## ... Procrustes: rmse 0.00011513 max resid 0.0006656998
## ... Similar to previous best
## Run 16 stress 0.1278738
## Run 17 stress 0.1362423
## Run 18 stress 0.1298699
## Run 19 stress 0.1391909
## Run 20 stress 0.1185575
## *** Best solution repeated 1 times
plot(metaz.ord$points[,1],metaz.ord$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord$points),metadata$Sample)])),main="Jaccard 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord$points[,1],metaz.ord$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))
plot(metaz.ord.b$points[,1],metaz.ord.b$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])),main="Bray 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord.b$points[,1],metaz.ord.b$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord.b$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))
Finally the bacteria
raw.bacteria.50.wide.d <- as.data.frame(raw.bacteria.50.wide)
rownames(raw.bacteria.50.wide.d) <- raw.bacteria.50.wide.d[,1]
raw.bacteria.50.wide.d <- raw.bacteria.50.wide.d[,-c(1,2,3)]
par(mfrow = c(1, 2))
metaz.ord <- metaMDS(vegdist(t(raw.bacteria.50.wide.d),method = "jaccard",binary = TRUE),trymax = 200)
## Run 0 stress 0.07618421
## Run 1 stress 0.0886093
## Run 2 stress 0.08250816
## Run 3 stress 0.07646896
## ... Procrustes: rmse 0.02101119 max resid 0.09510886
## Run 4 stress 0.08839692
## Run 5 stress 0.08250791
## Run 6 stress 0.08747634
## Run 7 stress 0.08374903
## Run 8 stress 0.06992665
## ... New best solution
## ... Procrustes: rmse 0.02497143 max resid 0.1033095
## Run 9 stress 0.08628233
## Run 10 stress 0.07562228
## Run 11 stress 0.06992426
## ... New best solution
## ... Procrustes: rmse 0.0004373639 max resid 0.002003678
## ... Similar to previous best
## Run 12 stress 0.07618411
## Run 13 stress 0.07041131
## ... Procrustes: rmse 0.01902059 max resid 0.08940651
## Run 14 stress 0.08201651
## Run 15 stress 0.07618438
## Run 16 stress 0.08593946
## Run 17 stress 0.0699244
## ... Procrustes: rmse 0.0001687932 max resid 0.0009818997
## ... Similar to previous best
## Run 18 stress 0.08144867
## Run 19 stress 0.07031802
## ... Procrustes: rmse 0.01196637 max resid 0.0733124
## Run 20 stress 0.07032135
## ... Procrustes: rmse 0.01412343 max resid 0.08656295
## *** Best solution repeated 2 times
metaz.ord.b <- metaMDS(vegdist(t(raw.plants.50.wide.d),method = "bray"),trymax = 200)
## Run 0 stress 0.1206552
## Run 1 stress 0.1253682
## Run 2 stress 0.1259867
## Run 3 stress 0.126094
## Run 4 stress 0.1035118
## ... New best solution
## ... Procrustes: rmse 0.0687823 max resid 0.2587032
## Run 5 stress 0.1184052
## Run 6 stress 0.1246365
## Run 7 stress 0.1321073
## Run 8 stress 0.1253755
## Run 9 stress 0.1035118
## ... New best solution
## ... Procrustes: rmse 2.284231e-05 max resid 6.63103e-05
## ... Similar to previous best
## Run 10 stress 0.1227575
## Run 11 stress 0.1206551
## Run 12 stress 0.1245438
## Run 13 stress 0.1184052
## Run 14 stress 0.143572
## Run 15 stress 0.1278739
## Run 16 stress 0.1227576
## Run 17 stress 0.137556
## Run 18 stress 0.1279246
## Run 19 stress 0.1227629
## Run 20 stress 0.1036039
## ... Procrustes: rmse 0.01508932 max resid 0.07600722
## *** Best solution repeated 1 times
plot(metaz.ord$points[,1],metaz.ord$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord$points),metadata$Sample)])),main="Jaccard 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord$points[,1],metaz.ord$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))
plot(metaz.ord.b$points[,1],metaz.ord.b$points[,2],pch=16,col=as.numeric(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])),main="Bray 50")
#ordisurf(metaz.ord,metadata$age[match(rownames(metaz.ord$points),metadata$Sample)],add=TRUE)
text(metaz.ord.b$points[,1],metaz.ord.b$points[,2]+0.05,labels=metadata$age[match(rownames(metaz.ord.b$points),metadata$Sample)],cex=0.8)
legend("topleft",bty='n', col=c(1,2,3),pch=16,legend=levels(as.factor(metadata$CoreID[match(rownames(metaz.ord.b$points),metadata$Sample)])))
Here we are showing taxonomic diversity for all samples across all cores, including only spp with high certainty (>100 reads), showing only 240 out of 322.
par(mfrow = c(1, 1))
raw.metazoa.wide.d <- as.data.frame(raw.metazoa.wide)
rownames(raw.metazoa.wide.d) <- raw.metazoa.wide.d[,1]
raw.metazoa.wide.d <- raw.metazoa.wide.d[,-1]
raw.metazoa.wide.d <- raw.metazoa.wide.d[order(rowSums(raw.metazoa.wide.d),decreasing = TRUE),]
strat.plot(t(raw.metazoa.wide.d)[,1:40],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.metazoa.wide.d)[,41:80],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.metazoa.wide.d)[,81:120],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.metazoa.wide.d)[,121:160],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.metazoa.wide.d)[,161:200],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.metazoa.wide.d)[,201:240],yvar = metadata$age[match(colnames(raw.metazoa.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
Let’s have a look at the plants, showing all 166.
raw.plants.wide.d <- as.data.frame(raw.plants.wide)
rownames(raw.plants.wide.d) <- raw.plants.wide.d[,1]
raw.plants.wide.d <- raw.plants.wide.d[,-1]
raw.plants.wide.d <- raw.plants.wide.d[order(rowSums(raw.plants.wide.d),decreasing = TRUE),]
strat.plot(t(raw.plants.wide.d)[,1:40],yvar = metadata$age[match(colnames(raw.plants.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.plants.wide.d)[,41:80],yvar = metadata$age[match(colnames(raw.plants.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.plants.wide.d)[,81:120],yvar = metadata$age[match(colnames(raw.plants.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.plants.wide.d)[,121:166],yvar = metadata$age[match(colnames(raw.plants.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
Finally the bacteria, showing only 360 out of 1049.
raw.bacteria.wide.d <- as.data.frame(raw.bacteria.wide)
rownames(raw.bacteria.wide.d) <- raw.bacteria.wide.d[,1]
raw.bacteria.wide.d <- raw.bacteria.wide.d[,-1]
raw.bacteria.wide.d <- raw.bacteria.wide.d[order(rowSums(raw.bacteria.wide.d),decreasing = TRUE),]
strat.plot(t(raw.bacteria.wide.d)[,1:40],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.bacteria.wide.d)[,41:80],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.bacteria.wide.d)[,81:120],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.bacteria.wide.d)[,121:160],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.bacteria.wide.d)[,161:200],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.bacteria.wide.d)[,201:240],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.bacteria.wide.d)[,241:280],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.bacteria.wide.d)[,281:320],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")
strat.plot(t(raw.bacteria.wide.d)[,321:360],yvar = metadata$age[match(colnames(raw.bacteria.wide.d),metadata$Sample)],y.rev=TRUE,plot.line=FALSE,lwd.bar=3,ylabel = "Age(CalYr BP)",cex.ylabel = 0.8,cex.xlabel = 1,xLeft = 0.1,yBottom = 0.1,ylabPos=3,las=2,col.bar = "orange")